import scrapy
import time
from novel.items import TypeItem
from novel.items import NovelItem
from novel.items import ChapItem
from novel.items import ContentItem


class SqlBiqugeSpider(scrapy.Spider):
    name = 'sql_biquge'
    allowed_domains = ['www.biquwx.la']
    start_urls = ['http://www.biquwx.la/']

    # 解析小说封面和类别
    def parse(self, response):
        types = response.xpath('//div[@class="nav"]//li[position()>2 and position()<last()-1]')
        for type_item in types:
            item = TypeItem()
            item['type_text'] = type_item.xpath('./a/text()').get()
            url = type_item.xpath('./a/@href').get()
            yield item
            yield scrapy.Request(url=url, callback=self.parse_novel, meta={'type_text': item['type_text']})

    def parse_novel(self, response):
        type_text = response.meta['type_text']
        novels = response.xpath('//div[@id="hotcontent"]//div[@class="item"]')
        for novel in novels:
            item = NovelItem()
            item['cover'] = novel.xpath('.//img/@src').get()
            item['novel_title'] = novel.xpath('.//dt/a/text()').get()
            item['novel_author'] = novel.xpath('.//dt/span/text()').get()
            item['type_text'] = type_text
            item['novel_desc'] = novel.xpath(".//dd//text()").get()
            chapter_url = novel.xpath('.//dt/a/@href').get()
            yield item
            yield scrapy.Request(url=chapter_url, callback=self.parse_chapter, meta={'novel_title': item['novel_title']})

    def parse_chapter(self, response):
        novel_title = response.meta['novel_title']
        chapters = response.xpath('//div[@id="list"]//dd')
        cur_url = response.url
        for chapter in chapters:
            item = ChapItem()
            item['chap_title'] = chapter.xpath('./a/text()').get()
            to_url = chapter.xpath('./a/@href').get()
            item['content_url'] = cur_url + to_url
            item['novel_title'] = novel_title
            timestamp = int(time.time())
            item['ctime'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(timestamp))
            yield item
            yield scrapy.Request(url=item['content_url'], callback=self.parse_content,
                                 meta={'chap_title': item['chap_title']})

    def parse_content(self, response):
        chap_title = response.meta['chap_title']
        contents = response.xpath('//div[@id="content"]/text()').getall()
        item = ContentItem()
        item['chap_title'] = chap_title
        item['content'] = "\n\n".join(contents)
        yield item


